--Question 1
--a. Count how many rows are in your products countries and sales tables
USE OLAP
SELECT count(*) as number_of_countries FROM Countries
SELECT count(*) as number_of_products FROM Products
SELECT count(*) as number_of_sales FROM Sales_2016
SELECT count(*) as number_of_sales FROM Sales_2017

--b. Make sure our OLTP has the new set of tables we setup for this chapter.
--You can do this by counting the number of rows in those tables
SELECT count(*) as number_of_new_countries FROM OLTP.dbo.Countries
SELECT count(*) as number_of_new_products FROM OLTP.dbo.Products
SELECT count(*) as number_of_new_sales FROM OLTP.dbo.Sales

--c. Modify your ELT produce to make sure that it doesn't load any rows
--that already exist in the production tables. 
go
ALTER PROCEDURE ETL @replace bit = null as
/* A procedure to extract, transform and load data from our OLTP database to our OLAP 
   database.

parameter:
@replace - values 0 or null will not replace existing rows in the production table
           the data will instead be appended to the tables.
         - a value of 1 will truncate existing data before loading the rows.
*/
BEGIN
	--Create the temporary tables
		DROP TABLE IF EXISTS ##Countries
		DROP TABLE IF EXISTS ##Products
		DROP TABLE IF EXISTS ##Sales

		SELECT * INTO ##Countries FROM OLTP.DBO.Countries
		SELECT * INTO ##Products FROM OLTP.DBO.Products
		SELECT * INTO ##Sales FROM OLTP.DBO.Sales

	--Transform the data
		--Dropping columns
		ALTER TABLE ##Countries
			DROP COLUMN F3,F4,F5,F6,F7,F8,F9

		--blank rows in sales
		DELETE FROM ##sales
		WHERE [Date] is null

		--Changing datatypes
			--Countries table
				ALTER TABLE ##Countries ALTER COLUMN country char(120) --change datatype to char(20)
				ALTER TABLE ##Countries ALTER COLUMN city char(120) --change datatype to char(20)
			--Products table
				ALTER TABLE ##Products ALTER COLUMN code char(15) --change size to char(5)
				ALTER TABLE ##Products ALTER COLUMN product_line char(120) --change datatype to char(120)
				ALTER TABLE ##Products ALTER COLUMN product_cost money 
				ALTER TABLE ##Products ADD product char(240) --add a new column when we split product and product type
			--Sales table
				ALTER TABLE ##sales ALTER COLUMN [date] date
				ALTER TABLE ##sales ALTER COLUMN Retailer_city char(120)
				ALTER TABLE ##sales ALTER COLUMN Order_method_type char(120)
				--select max(len(urgent)) from ##sales
				ALTER TABLE ##sales ALTER COLUMN Urgent char(10)
				ALTER TABLE ##sales ALTER COLUMN Retailer_type char(120)
				--select max(len(product_code)) from ##sales
				ALTER TABLE ##sales ALTER COLUMN Product_code char(15)
				ALTER TABLE ##sales ALTER COLUMN Sale_price money
				ALTER TABLE ##sales ALTER COLUMN Quantity_sold int
		--Modifying the contents of the tables
			--Sales
				--Get rid of spaces
				UPDATE ##sales
				SET retailer_city=trim(retailer_city)
				--Correct 'Lodon', 'Londonn'
				UPDATE ##sales
				SET retailer_city='London'
				WHERE retailer_city in ('Lodon', 'Londonn')
				--Handling nulls
				DELETE ##sales
				WHERE quantity_sold is null

				--Retailer Type
				--Trim the spaces
				UPDATE ##sales
				SET retailer_type=trim(retailer_type)
			    --
				--Remove non-printable characters
				UPDATE ##sales
				SET retailer_type =
						REPLACE(
						REPLACE(
						REPLACE(
						REPLACE(
						REPLACE(retailer_type,char(9), '')--horizontal tab
						,char(10), '') --Line Feed
						,char(11), '') --vertical tab
						,char(12), '') --Form Feed
						,char(13), '') --Carriage Return
				FROM ##Sales
				--Fix the names of the following retailer types
				--   "Direct Mark." should be "Direct Marketing"
				UPDATE ##sales
				SET retailer_type = 'Direct Marketing'
				WHERE retailer_type = 'Direct Mark.'
				--   "Dept. Store" should be "Department Store"
				UPDATE ##sales
				SET retailer_type = 'Department Store'
				WHERE retailer_type = 'Dept. Store'

			-- fill down
				BEGIN
					DECLARE @DB_CURSOR as CURSOR;
					DECLARE @previous_product_line CHAR(120);
					DECLARE @current_product_line CHAR(120);
					BEGIN 
						BEGIN TRANSACTION
						SET @DB_CURSOR = CURSOR FOR
							select trim(product_line)
							from ##products
						OPEN @DB_CURSOR;
						FETCH NEXT FROM @DB_CURSOR INTO @current_product_line
						WHILE @@FETCH_STATUS =0
						BEGIN
						  IF @current_product_line is null
							  BEGIN
								UPDATE ##products
								SET product_line = @previous_product_line
								WHERE CURRENT OF @DB_CURSOR
							  END
						  ELSE
							BEGIN
								SET @previous_product_line=@current_product_line
							END
						  FETCH NEXT FROM @DB_CURSOR INTO @current_product_line
						END
						PRINT 'Finished'
						COMMIT TRANSACTION
					END;
					CLOSE @DB_CURSOR;
					DEALLOCATE @DB_CURSOR;
				END

			--Getting rid of rows
			DELETE FROM ##products
			WHERE product_line IN ('Golf Equipment','Personal Accessories')

	--Loading the data into production tables
		--if @replace is 1 then we will remove all the rows in production table
		IF @replace=1
			BEGIN
				truncate table countries
				truncate table products
				truncate table sales_2016
				truncate table sales_2017
			END

		--load the data into production tables

			--Insert Countries 
			INSERT INTO countries
				SELECT distinct country,city
				FROM ##countries
				EXCEPT
		        SELECT * FROM Countries

			--Insert products
			INSERT INTO products(Code, Product_line
									,Product_type
									,Product
									,Product_cost)
			SELECT Code, Product_line
					,OLTP.dbo.findstringto([Product_type/Product],'/') --Product_type
					,dbo.proper_case(OLTP.dbo.findstringfrom([Product_type/Product],'/')) -- Product in proper case
					,Product_cost
			FROM ##products
			EXCEPT
		    SELECT * FROM products 

			--Insert sales into appropriate table
			INSERT INTO sales_2016
			SELECT * FROM ##sales
			WHERE datepart(year,[Date])=2016
			EXCEPT
		    SELECT * FROM sales_2016

			INSERT INTO sales_2017
			SELECT * FROM ##sales
			WHERE datepart(year,date)=2017
			EXCEPT
		    SELECT * FROM sales_2017
END

--d. Test it by running the ETL on the new set of transaction tables
exec ETL

--e. Confirm that ETL has added 4 sales to your sales table and 2 cities to your country table 
SELECT count(*) as number_of_countries FROM Countries
SELECT count(*) as number_of_products FROM Products
SELECT count(*) as number_of_2016_sales FROM Sales_2016
SELECT count(*) as number_of_2017_sales FROM Sales_2017

--f. To be doubly sure you can check that a query with no duplicates minus a query with duplicates doesn't give you any rows.
SELECT * FROM sales_2017
EXCEPT 
SELECT distinct * FROM sales_2017

SELECT * FROM sales_2016
EXCEPT 
SELECT distinct * FROM sales_2016

SELECT * FROM countries
EXCEPT 
SELECT distinct * FROM countries

--Technical note: Since SQL Server is case INSENSITIVE if the only difference
--between the data is capitals verses lower case then EXCEPT will eliminate them
--so you might want to switch case insensitivity off using COLLATE
	--SELECT Code, Product_line
	--		,OLTP.dbo.findstringto([Product_type/Product],'/') --Product_type
	--		,dbo.proper_case(OLTP.dbo.findstringfrom([Product_type/Product],'/')) 
	--		     COLLATE SQL_Latin1_General_CP1_CS_AS -- Case sensitive version
	--		,Product_cost
	--FROM ##products
	--EXCEPT
	--SELECT Code, Product_line,Product_type
	--		,Product COLLATE SQL_Latin1_General_CP1_CS_AS -- Case sensitive version
	--		,Product_cost 
	--FROM products 

--You can see the current database collation using
--SELECT SERVERPROPERTY ('Collation')